##################################################################
####State Capacity, Insurgency, and Civil War - NTL Validation####
##################################################################

library(DAAG)

#Set working directory
setwd("~/Google Drive/NSLC/Final Dofiles/")


###########
###Ghana###
###########
gha.dat <- read.csv("GhanaStateCapacity.csv")
#Create a log version of nighttime light for 2012
gha.dat$lnNL12sum <- log(gha.dat$NL12sum+1)
#Create a log version of government employess
gha.dat$lnGovEmp <- log(gha.dat$Public..Government.+1)
#Correlation
cor(gha.dat$lnNL12sum, gha.dat$lnGovEmp)
#Plot
pdf("ghana1.pdf")
plot(gha.dat$lnNL12sum, gha.dat$lnGovEmp, xlab="Ln(NL12)", ylab="Ln(Government Employees)",
     sub="Correlation = 0.651")
mylm <- lm(gha.dat$lnGovEmp ~ gha.dat$lnNL12sum)
abline(mylm)
dev.off()

###Predict using k-fold CV (10 folds)
gha.s <- data.frame(gha.dat[,c("lnGovEmp", "lnNL12sum")])
p2.cv <- CVlm(data=gha.s, form.lm=formula(lnGovEmp ~ lnNL12sum), m=10)
p2.out <- p2.cv[c("cvpred","lnGovEmp")]
cor(p2.out$cvpred, p2.out$lnGovEmp)
#Plot
pdf("ghana1pred.pdf")
plot(p2.out$cvpred, p2.out$lnGovEmp,
     xlab="Predicted Values Based on ln(NL12)", 
     ylab="Ln(Government Employees)",
     sub="Correlation = 0.634")
abline(lm(p2.out$lnGovEmp~p2.out$cvpred))
dev.off()

############
###Brazil###
############
br <- read.csv("braziltax.csv")
#Create a log version of nighttime light for 2012
br$logNL2012sum <- log(br$NL2012sum)
#Create a log version of contributions
br$lnContributions2012 <- log(br$Contributions2012+1)
#Correlation
cor(br$lnContributions2012, br$logNL2012sum)
#Plot
pdf("br1.pdf")
plot(br$logNL2012sum, br$lnContributions2012,
     xlab="Ln(NL12)", 
     ylab="Ln(Contribution by District)",
     sub="Correlation = 0.739")
abline(lm(br$lnContributions2012~br$logNL2012sum))
dev.off()

###Predict using k-fold CV (10 folds)
brs <- data.frame(br[,c("lnContributions2012", "logNL2012sum")])
p1.cv <- CVlm(data=brs, form.lm=formula(lnContributions2012 ~ logNL2012sum), m=10)
p1.out <- p1.cv[c("cvpred","lnContributions2012")]
cor(p1.out$cvpred, p1.out$lnContributions2012)
#Plot
pdf("br1pred.pdf")
plot(p1.out$cvpred, p1.out$lnContributions2012,
     xlab="Predicted Values Based on ln(NL12)", 
     ylab="Ln(Contribution by District)",
     sub="Correlation = 0.697")
abline(lm(p1.out$lnContributions2012~p1.out$cvpred))
dev.off()


###########
###India###
###########
ind <- read.csv("india.csv")
#Create a log version of number of health centers
ind$logPHS <- log(ind$NoPHCs+1)
#Create a log version of nighttime light for 2012
ind$logNL2012sum <- log(ind$NL2012sum+1)
#Correlation
cor(ind$logNL2012sum, ind$logPHS)
#Plot
pdf("ind1.pdf")
plot(ind$logNL2012sum, ind$logPHS,
     xlab="Ln(NL12)", 
     ylab="Ln(Public Health Centers)",
     sub="Correlation = 0.506")
abline(lm(ind$logPHS~ind$logNL2012sum))
dev.off()

###Predict using k-fold CV (10 folds)
ind.s <- data.frame(ind[,c("logPHS", "logNL2012sum")])
p3.cv <- CVlm(data=ind.s, form.lm=formula(logPHS ~ logNL2012sum), m=10)
p3.out <- p3.cv[c("cvpred","logPHS")]
cor(p3.out$cvpred, p3.out$logPHS)
#Plot
pdf("ind1pred.pdf")
plot(p3.out$cvpred, p3.out$logPHS,
     xlab="Predicted Values Based on ln(NL12)", 
     ylab="Ln(Public Health Centers)",
     sub="Correlation = 0.478")
abline(lm(p3.out$logPHS~p3.out$cvpred))
dev.off()


#############
###Ecuador###
#############
ecuad <- read.csv("EcuadorCantonTaxNL2012.csv")
#Create a log version of taxation revenue
ecuad$logTax <- log(ecuad$Tax2012Peso+1)
#Create a log version of nighttime light for 2012
ecuad$logNL2012mean <- log(ecuad$NL2012Mean+1)
#Subset data for correlation and prediction
ecuad.s <- ecuad[,c("logTax", "logNL2012mean")]
ecuad.s <- na.omit(ecuad.s)
#Correlation
cor(ecuad.s$logNL2012mean, ecuad.s$logTax)
#Plot
pdf("ecuad1.pdf")
plot(ecuad.s$logNL2012mean, ecuad.s$logTax,
     xlab="Ln(NL12)", 
     ylab="Ln(Tax Revenue)",
     sub="Correlation = 0.442")
abline(lm(ecuad.s$logTax~ecuad.s$logNL2012mean))
dev.off()

###Predict using k-fold CV (10 folds)
p4.cv <- CVlm(data=ecuad.s, form.lm=formula(logTax ~ logNL2012mean), m=10)
p4.out <- p4.cv[c("cvpred","logTax")]
cor(p4.out$cvpred, p4.out$logTax)
#Plot
pdf("ecuad1pred.pdf")
plot(p4.out$cvpred, p4.out$logTax,
     xlab="Predicted Values Based on ln(NL12)", 
     ylab="Ln(Tax Revenue)",
     sub="Correlation = 0.399")
abline(lm(p4.out$logTax~p4.out$cvpred))
dev.off()

